Simulation Result

Comparison of Causal Discovery Agorithms

Author

Kyuri Park

Published

January 31, 2023

1 Simualtion Design

1.1 Sparse 5 nodes model

See code here.
par(oma=c(0, 0, 4, 0))
## Specify B matrix
B5sparse = matrix(c(0, 0, 0, 0, 0,
                 1, 0, 0.8, 0, 0,
                 0, 0, 0, 0.9, 0,
                 0, 0.7, 0, 0, 1.5,
                 0, 0, 0, 0, 0), 5, 5, byrow = T)

colnames(B5sparse) <- c("X1", "X2", "X3", "X4", "X5")

# specify layout
layout5 = matrix(c(0,1,
                   0,0,
                   1,-1,
                   2,0,
                   2,1),5,2,byrow = T)

true5psparse <- qgraph::qgraph(t(B5sparse), layout=layout5, labels = colnames(B5sparse), theme="colorblind", vsize = 8, asize = 5)
title("True DCG",  font.main = 1, cex.main = 2, line = 2, outer=TRUE)
## True PAG
truepag_5psparse <- matrix(c(
       0, 2, 2, 0, 0,
       3, 0, 3, 3, 3,
       3, 3, 0, 3, 0,
       0, 3, 3, 0, 3,
       0, 2, 0, 2, 0), 5, 5, byrow = TRUE)
dimnames(truepag_5psparse) <- list(paste("X", 1:5, sep=""), paste("X", 1:5, sep=""))
plotAG(truepag_5psparse)
title(main = "True PAG", font.main = 1, cex.main = 2, line = 2, outer=TRUE)

1.2 Dense 5 nodes model

See code here.
par(oma=c(0, 0, 4, 0))
## Specify B matrix
B5dense = matrix(c(0, 0, 0, 0, 0,
                    1, 0, 0.8, 0, 0,
                    0, 0, 0, 0.9, 0,
                    0, 0.7, 0, 0, 1.5,
                    1, 0, 0, 0, 0), 5, 5, byrow = T)

colnames(B5dense) <- c("X1", "X2", "X3", "X4", "X5")

true5pdense <- qgraph(t(B5dense), layout=layout5, labels = colnames(B5dense), theme="colorblind", vsize = 8, asize = 5)
title("True DCG",  font.main = 1, cex.main = 2, line = 2, outer=TRUE)
## True PAG
truepag_5pdense <- matrix(c(
  0, 2, 2, 0, 2,
  3, 0, 3, 3, 3,
  3, 3, 0, 3, 0,
  0, 3, 3, 0, 3,
  3, 2, 0, 2, 0), 5, 5, byrow = TRUE)
dimnames(truepag_5pdense) <- list(paste("X", 1:5, sep=""), paste("X", 1:5, sep=""))
plotAG(truepag_5pdense)
title(main = "True PAG", font.main = 1, cex.main = 2, line = 2, outer=TRUE)

1.3 Sparse 10 nodes model

See code here.
par(oma=c(0, 0, 4, 0))
## Specify B matrix
B10sparse = matrix(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                  0.4, 0.8, 0, 0, 0, 0, 0, 0, 0, 0, 
                  0, 0, 0.7, 0, 0, 0.9, 0, 0, 0, 0, 
                  0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 
                  0, 0, 0, 0, 0.2, 0, 0.5, 0, 0, 0, 
                  0, 0, 0, 0, 0, 0, 0, 1, 0.8, 0, 
                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0.4, 
                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 10, 10, byrow = T)

dimnames(B10sparse) <- list(paste("X", 1:10, sep=""), paste("X", 1:10, sep=""))

# specify layout
layout10 = matrix(c(0,1,
                      2,1,
                      1,0,
                      2,-1,
                      3,0,
                      4, -1,
                      5, 0,
                      6, -1,
                      4, 1,
                      7, 1),10,2,byrow = T)

true10psparse <- qgraph(t(B10sparse), layout = layout10, labels = colnames(B10sparse), theme="colorblind", vsize = 8, asize = 5)
title("True DCG",  font.main = 1, cex.main = 2, line = 2, outer=TRUE)
## True PAG
truepag_10psparse <- matrix(c(
         0, 0, 2, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 
         3, 3, 0, 2, 0, 0, 0, 0, 0, 0, 
         0, 0, 3, 0, 3, 3, 0, 0, 0, 0, 
         0, 0, 0, 3, 0, 3, 0, 0, 0, 0, 
         0, 0, 0, 3, 3, 0, 3, 0, 0, 0, 
         0, 0, 0, 0, 0, 2, 0, 3, 3, 0, 
         0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 
         0, 0, 0, 0, 0, 0, 2, 0, 0, 3, 
         0, 0, 0, 0, 0, 0, 0, 0, 2, 0), 10, 10, byrow = T)

dimnames(truepag_10psparse) <- list(paste("X", 1:10, sep=""), paste("X", 1:10, sep=""))
plotAG(truepag_10psparse)
title(main = "True PAG", font.main = 1, cex.main = 2, line = 2, outer=TRUE)

1.4 Dense 10 nodes model

See code here.
par(oma=c(0, 0, 4, 0))
## Specify B matrix
B10dense = matrix(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                     0.4, 0.8, 0, 0, 0, 0, 0, 0, 0, 0, 
                     0, 0, 0.7, 0, 0, 0.9, 0, 0, 0, 0, 
                     0, 0.4, 0, 1, 0, 0, 0, 0, 0, 0, 
                     0, 0, 0, 0, 0.9, 0, 0.5, 0, 0, 0, 
                     0, 0, 0, 0, 0, 0, 0, 1, 0.8, 1, 
                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0.6, 
                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0.4, 
                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 10, 10, byrow = T)

colnames(B10dense) <- c(paste("X", 1:10, sep=""))

# specify layout
layout10 = matrix(c(0,1,
                    2,1,
                    1,0,
                    2,-1,
                    3,0,
                    4, -1,
                    5, 0,
                    6, -1,
                    4, 1,
                    7, 1),10,2,byrow = T)

true10pdense <- qgraph(t(B10dense), layout = layout10, labels = colnames(B10dense), theme="colorblind", vsize = 8, asize = 5)
title("True DCG",  font.main = 1, cex.main = 2, line = 2, outer=TRUE)
## True PAG
truepag_10pdense <- matrix(c(
  0, 0, 2, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 2, 2, 2, 0, 0, 0, 0, 0, 
  3, 3, 0, 2, 0, 2, 0, 0, 0, 0, 
  0, 3, 3, 0, 3, 3, 0, 0, 0, 0, 
  0, 3, 0, 3, 0, 3, 3, 0, 0, 0, 
  0, 0, 0, 3, 3, 0, 3, 0, 0, 0, 
  0, 0, 0, 0, 0, 2, 0, 3, 3, 3, 
  0, 0, 0, 0, 0, 0, 2, 0, 0, 3, 
  0, 0, 0, 0, 0, 0, 2, 0, 0, 3, 
  0, 0, 0, 0, 0, 0, 2, 2, 2, 0), 10, 10, byrow = T)

dimnames(truepag_10pdense) <- list(paste("X", 1:10, sep=""), paste("X", 1:10, sep=""))
plotAG(truepag_10pdense)
title(main = "True PAG", font.main = 1, cex.main = 2, line = 2, outer=TRUE)

1.5 5 nodes with a LV model

See code here.
par(oma=c(0, 0, 4, 0))
## Specify B matrix
B5_lv2 = matrix(c(0, 0, 0, 0, 0, 1,
                  0, 0, 0, 0, 0, 1,
                  0, 0.5, 0, 0, 0.6, 0,
                  1, 0, 0, 0, 0.5, 0,
                  0, 0, 0, 0.5, 0,0,
                  0,0,0,0,0,0), 6, 6, byrow = T)

colnames(B5_lv2) <- c("X1", "X2", "X3", "X4", "X5", "L1")
# specify layout
layout5_lv2 = matrix(c(0,1,
                       0,0,
                       1,0.5,
                       2,1,
                       2,0,
                       -1, 0.5),6,2,byrow = T)

true5p_lv2 <- qgraph(t(B5_lv2), layout=layout5_lv2, labels = colnames(B5_lv2), theme="colorblind", vsize = 8, asize = 5)
title("True DCG",  font.main = 1, cex.main = 2, line = 2, outer=TRUE)
## True PAG
truepag_5pLV2 <- matrix(c(
  0, 2, 2, 0, 0,
  2, 0, 3, 3, 3,
  2, 3, 0, 3, 0,
  0, 3, 3, 0, 3,
  0, 2, 0, 2, 0), 5, 5, byrow = TRUE)
dimnames(truepag_5pLV2) <- list(paste("X", 1:5, sep=""), paste("X", 1:5, sep=""))
plotAG(truepag_5pLV2)
title(main = "True PAG", font.main = 1, cex.main = 2, line = 2, outer=TRUE)

1.6 10 nodes with a LV model

See code here.
par(oma=c(0, 0, 4, 0))
## Specify B matrix
B10_lv = matrix(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                  0.4, 0.8, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                  0, 0, 0.7, 0, 0, 0.9, 0, 0, 0, 0, 0,
                  0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
                  0, 0, 0, 0, 0.2, 0, 0.5, 0, 0, 0, 0,
                  0, 0, 0, 0, 0, 0, 0, 1, 0.8, 0, 0,
                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.8,
                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0.4, 0,
                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.2,
                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 11, 11, byrow = T)

colnames(B10_lv) <- c(paste("X", 1:10, sep=""), "L1")

# specify layout
layout10LV = matrix(c(0,1,
                      2,1,
                      1,0,
                      2,-1,
                      3,0,
                      4, -1,
                      5, 0,
                      6, -1,
                      4, 1,
                      7, 1,
                      8, 0),11,2,byrow = T)

true10pLV <- qgraph(t(B10_lv), layout = layout10LV, labels = colnames(B10_lv), theme="colorblind", vsize = 8, asize = 5)
title("True DCG",  font.main = 1, cex.main = 2, line = 2, outer=TRUE)
## True PAG
truepag_10pLV <- matrix(c(
  0, 0, 2, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 
  3, 3, 0, 2, 0, 2, 0, 0, 0, 0, 
  0, 0, 3, 0, 3, 3, 0, 0, 0, 0, 
  0, 0, 0, 3, 0, 3, 3, 0, 0, 0, 
  0, 0, 3, 3, 3, 0, 3, 0, 0, 0, 
  0, 0, 0, 0, 2, 2, 0, 3, 3, 0, 
  0, 0, 0, 0, 0, 0, 2, 0, 0, 2, 
  0, 0, 0, 0, 0, 0, 2, 0, 0, 3, 
  0, 0, 0, 0, 0, 0, 0, 2, 2, 0), 10, 10, byrow = T)

dimnames(truepag_10pLV) <- list(paste("X", 1:10, sep=""), paste("X", 1:10, sep=""))
plotAG(truepag_10pLV)
title(main = "True PAG", font.main = 1, cex.main = 2, line = 2, outer=TRUE)


2 Simulation Result

See code here.
## Precision & Recall
# compute the average precision and sd
results <- list(res_ccd5psparse, res_fci5psparse, res_cci5psparse, res_ccd10psparse, res_fci10psparse, res_cci10psparse, res_ccd5pdense, res_fci5pdense, res_cci5pdense, res_ccd10pdense, res_fci10pdense, res_cci10pdense, res_ccd5pLV2, res_fci5pLV2, res_cci5pLV2, res_ccd10pLV,  res_fci10pLV, res_cci10pLV) %>% 
  # transpose df
  map(~ sjmisc::rotate_df(.x) %>%
  # add sample size (N) info
  rename_with(~paste0(.x, "N = ", rep(N, each=8)))  %>%
  # think about how to deal with NAs or do I want to define sth. else instead of NAs.
  #na.omit(.x) %>% 
  summarise(across(everything(.), list(mean = ~mean(., na.rm=T), sd = ~sd(., na.rm=T))))) %>% 
  bind_rows() %>% 
  mutate(algorithm = rep(c("ccd", "fci", "cci"), 6),
         condition = rep(c("5p_sparse", "10p_sparse", "5p_dense", "10p_dense", "5p_LV", "10p_LV"), each=3)) %>%
  # brings the algorithm and condition names first
  relocate(where(is.character), .before = where(is.numeric)) %>% 
  # convert itto a long format
  tidyr::pivot_longer(!c(algorithm, condition), names_to = "metric", values_to = "value") %>% 
  # Add sample size column (N) & clean up the column name
  mutate(N = stringr::str_extract(metric, "(?<=[N =])\\d+"),
         metric = stringr::str_replace_all(metric, "[0-9.]+|[N =]", "")) 

## Uncertainty
uncertainties <- bind_rows("ccd_5p-sparse" = uncer_ccd5psparse, "fci_5p-sparse" = uncer_fci5psparse, "cci_5p-sparse"=uncer_cci5psparse, "ccd_10p-sparse"=uncer_ccd10psparse, "fci_10p-sparse" = uncer_fci10psparse, "cci_10p-sparse" = uncer_cci10psparse, "ccd_5p-dense"=uncer_ccd5pdense, "fci_5p-dense"=uncer_fci5pdense, "cci_5p-dense"=uncer_cci5pdense, "ccd_10p-dense"=uncer_ccd10pdense, "fci_10p-dense"=uncer_fci10pdense, "cci_10p-dense"=uncer_cci10pdense, "ccd_5p-LV"=uncer_ccd5pLV2, "fci_5p-LV"=uncer_fci5pLV2, "cci_5p-LV"=uncer_cci5pLV2, "ccd_10p-LV"=uncer_ccd10pLV, "fci_10p-LV"=uncer_fci10pLV, "cci_10p-LV"=uncer_cci10pLV, .id="id") %>% 
  group_by(id) %>% 
  summarise_all(list(mean = mean, sd = sd)) %>%  
  mutate(algorithm = stringr::str_split(id, "_", simplify = T)[,1],
         condition = stringr::str_split(id, "_", simplify = T)[,2]) %>% 
  tidyr::pivot_longer(!c(algorithm, condition, id), names_to = "name", values_to = "value") %>% 
  mutate(N = stringr::str_extract(stringr::str_split(name, "_", simplify = T)[,1], "(\\d)+"),
         statistics = stringr::str_split(name, "_", simplify = T)[,2]) %>% 
  dplyr::select(-id, -name) %>%  relocate(where(is.character), .before = where(is.numeric))



## SHD
SHDs <- bind_rows("ccd_5p-sparse" = SHD_ccd5psparse, "fci_5p-sparse" = SHD_fci5psparse, "cci_5p-sparse"=SHD_cci5psparse, "ccd_10p-sparse"= SHD_ccd10psparse, "fci_10p-sparse" = SHD_fci10psparse, "cci_10p-sparse" = SHD_cci10psparse, "ccd_5p-dense"= SHD_ccd5pdense, "fci_5p-dense"=SHD_fci5pdense, "cci_5p-dense"=SHD_cci5pdense, "ccd_10p-dense"= SHD_ccd10pdense, "fci_10p-dense"=SHD_fci10pdense, "cci_10p-dense"=SHD_cci10pdense, "ccd_5p-LV"=SHD_ccd5pLV2, "fci_5p-LV"=SHD_fci5pLV2, "cci_5p-LV"=SHD_cci5pLV2, "ccd_10p-LV"=SHD_ccd10pLV, "fci_10p-LV"=SHD_fci10pLV, "cci_10p-LV"=SHD_cci10pLV, .id="id") %>% 
  group_by(id) %>% 
  summarise_all(list(mean = mean, sd = sd)) %>%  
  mutate(algorithm = stringr::str_split(id, "_", simplify = T)[,1],
         condition = stringr::str_split(id, "_", simplify = T)[,2]) %>% 
  tidyr::pivot_longer(!c(algorithm, condition, id), names_to = "name", values_to = "value") %>% 
  mutate(N = stringr::str_extract(stringr::str_split(name, "_", simplify = T)[,1], "(\\d)+"),
         statistics = stringr::str_split(name, "_", simplify = T)[,2]) %>% 
  dplyr::select(-id, -name) %>%  relocate(where(is.character), .before = where(is.numeric)) 
See code here.
## Specify my custom theme
MyTheme <-  theme(plot.title = element_blank(),
                  plot.subtitle = element_text( face = "italic"),
                  axis.text=element_text(face = "bold"),
                  legend.text = element_text(face = "bold"))

## ========================
## precision plots
## ========================
precision_plots <- c(unique(results$condition)) %>% 
  map(~
results %>% 
  filter(condition == .x & grepl("average_precision", metric)) %>% 
  tidyr::pivot_wider(names_from = metric, values_from=value) %>% 
  ggplot(aes(x= factor(N, levels = c("50", "150", "500", "1000", "5000")), y=average_precision_mean, group = algorithm, colour = algorithm, fill=algorithm)) +
  geom_line(aes(group = algorithm)) +
  geom_point() +
  #geom_errorbar(aes(ymin=average_precision_mean-qnorm(0.975)*average_precision_sd/sqrt(as.numeric(N)), ymax=average_precision_mean+qnorm(0.975)*average_precision_sd/sqrt(as.numeric(N))), width=0.1) +
  geom_ribbon(aes(ymin=average_precision_mean-qnorm(0.975)*average_precision_sd/sqrt(as.numeric(N)), ymax=average_precision_mean+qnorm(0.975)*average_precision_sd/sqrt(as.numeric(N))), alpha=0.2,lwd=0) +
  scale_colour_manual(values = c("#FF0000", "#00A08A", "#F2AD00"), name= "") +
  scale_fill_manual(values = c("#FF0000", "#00A08A", "#F2AD00"), name= "") +
  labs(x="N", y="", title = "", subtitle = .x) +
  theme_classic() + MyTheme
)

## ========================
## recall plots
## ========================
recall_plots <- c(unique(results$condition)) %>% 
  map(~
results %>% 
  filter(condition == .x & grepl("average_recall", metric)) %>% 
  tidyr::pivot_wider(names_from = metric, values_from=value) %>% 
  ggplot(aes(x= factor(N, levels = c("50", "150", "500", "1000", "5000")), y=average_recall_mean, group = algorithm, colour = algorithm, fill= algorithm)) +
  geom_line(aes(group = algorithm)) +
  geom_point() +
  #geom_errorbar(aes(ymin=average_recall_mean-qnorm(0.975)*average_recall_sd/sqrt(as.numeric(N)), ymax=average_recall_mean+qnorm(0.975)*average_recall_sd/sqrt(as.numeric(N))), width=0.1) +
  geom_ribbon(aes(ymin=average_recall_mean-qnorm(0.975)*average_recall_sd/sqrt(as.numeric(N)), ymax=average_recall_mean+qnorm(0.975)*average_recall_sd/sqrt(as.numeric(N))), alpha=0.2,lwd=0) +
  scale_colour_manual(values = c("#FF0000", "#00A08A", "#F2AD00"), name= "") +
  scale_fill_manual(values = c("#FF0000", "#00A08A", "#F2AD00"), name= "") +
  labs(x="N", y="", title = "", subtitle = .x) +
  theme_classic() + MyTheme
)

## ========================
## uncertainty plots
## ========================
## uncertainty (uncertainty rate of fci and cci are exactly the same!)
uncertainty_plots <- c(unique(uncertainties$condition)) %>% 
  map(~
uncertainties %>%
  filter(condition == .x) %>% 
  tidyr::pivot_wider(names_from = statistics, values_from=value) %>% 
  ggplot(aes(x= factor(N, levels = c("50", "150", "500", "1000", "5000")), y=mean, group = algorithm, colour = algorithm, fill=algorithm)) +
  geom_line(aes(group = algorithm)) +
  geom_point() +
  #geom_errorbar(aes(ymin=mean-qnorm(0.975)*sd/sqrt(as.numeric(N)), ymax=mean+qnorm(0.975)*sd/sqrt(as.numeric(N))), width=0.1) +
  geom_ribbon(aes(ymin=mean-qnorm(0.975)*sd/sqrt(as.numeric(N)), ymax=mean+qnorm(0.975)*sd/sqrt(as.numeric(N))), alpha=0.2,lwd=0) +
  scale_colour_manual(values = c("#FF0000", "#00A08A", "#F2AD00"), name= "") +
  scale_fill_manual(values = c("#FF0000", "#00A08A", "#F2AD00"), name= "") +
  labs(x="N", y="", title = "", subtitle = .x) +
  theme_classic() + MyTheme
)

## ========================
## SHD plots
## ========================
SHD_plots <- c(unique(SHDs$condition)) %>% 
  map(~
SHDs %>%
  filter(condition == .x) %>% 
  tidyr::pivot_wider(names_from = statistics, values_from=value) %>% 
  ggplot(aes(x= factor(N, levels = c("50", "150", "500", "1000", "5000")), y=mean, group = algorithm, colour = algorithm, fill = algorithm)) +
  geom_line(aes(group = algorithm)) +
  geom_point() +
  #geom_errorbar(aes(ymin=mean-qnorm(0.975)*sd/sqrt(as.numeric(N)), ymax=mean+qnorm(0.975)*sd/sqrt(as.numeric(N))), width=0.1) +
  geom_ribbon(aes(ymin=mean-qnorm(0.975)*sd/sqrt(as.numeric(N)), ymax=mean+qnorm(0.975)*sd/sqrt(as.numeric(N))), alpha=0.2,lwd=0) +
  scale_colour_manual(values = c("#FF0000", "#00A08A", "#F2AD00"), name= "") +
  scale_fill_manual(values = c("#FF0000", "#00A08A", "#F2AD00"), name= "") +
  labs(x="N", y="", title = "", subtitle = .x) +
  theme_classic() + MyTheme
)

# combine plots
# precision plot
ggarrange(plotlist = precision_plots,
                    ncol = 2, nrow = 3, common.legend = TRUE, legend = "bottom") %>%
  annotate_figure(top = text_grob("Precision", face = "bold", size = 14, family = "Palatino"))
# recall plot
ggarrange(plotlist = recall_plots,
                    ncol = 2, nrow = 3, common.legend = TRUE, legend = "bottom") %>%
  annotate_figure(top = text_grob("Recall", face = "bold", size = 14, family = "Palatino"))
# uncertainty plot
ggarrange(plotlist = uncertainty_plots,
                    ncol = 2, nrow = 3, common.legend = TRUE, legend = "bottom") %>%
  annotate_figure(top = text_grob("Uncertainty", face = "bold", size = 14, family = "Palatino"))
# shd plot
ggarrange(plotlist = SHD_plots,
                    ncol = 2, nrow = 3, common.legend = TRUE, legend = "bottom") %>% 
annotate_figure(top = text_grob("SHD", face = "bold", size = 14, family = "Palatino"))

Figure 1. Simuation results_varying sample sizes

Figure 2. Simuation results_varying sample sizes

Figure 3. Simuation results_varying sample sizes

Figure 4. Simuation results_varying sample sizes

See code here.
## ========================
## WITHOUT LV CONDITION
## ========================

## WITHOUT LV CONDITION: PRECISION
p1 <- results %>% 
  # exclude LV conditions
  filter(!grepl("LV", condition), N == "1000") %>% 
  tidyr::pivot_wider(names_from = metric, values_from=value) %>% 
ggplot(aes(x= factor(condition, levels = c("5p_sparse", "5p_dense", "10p_sparse", "10p_dense")), y=average_precision_mean, group = algorithm, colour = algorithm)) +
  scale_colour_manual(values = c("#FF0000", "#00A08A", "#F2AD00"), name= "") +
  geom_line(aes(group = algorithm)) +
  geom_point() +
  # in this case, N = 1000 (hence, sqrt(1000))
  geom_errorbar(aes(ymin=average_precision_mean-qnorm(0.975)*average_precision_sd/sqrt(as.numeric(N)), ymax=average_precision_mean+qnorm(0.975)*average_precision_sd/sqrt(as.numeric(N))), width=0.1) +
  labs(x="", y="",title = "Without a Latent Variable Condition", subtitle = "Without LV_PRECISION") +
  theme_classic() + MyTheme

## WITHOUT LV CONDITION: RECALL
p2 <- results %>% 
  # exclude LV conditions
  filter(!grepl("LV", condition), N == "1000") %>% 
  tidyr::pivot_wider(names_from = metric, values_from=value) %>% 
  ggplot(aes(x= factor(condition, levels = c("5p_sparse", "5p_dense", "10p_sparse", "10p_dense")), y=average_recall_mean, group = algorithm, colour = algorithm)) +
  scale_colour_manual(values = c("#FF0000", "#00A08A", "#F2AD00"), name= "") +
  geom_line(aes(group = algorithm)) +
  geom_point() +
  # in this case, N = 1000 (hence, sqrt(1000))
  geom_errorbar(aes(ymin=average_recall_mean-qnorm(0.975)*average_recall_sd/sqrt(as.numeric(N)), ymax=average_recall_mean+qnorm(0.975)*average_recall_sd/sqrt(as.numeric(N))), width=0.1) +
  labs(x="", y="", title = "", subtitle = "Without LV_RECALL") +
  theme_classic() + MyTheme
  

## WITHOUT LV CONDITION: UNCERTAINTY
p3 <- uncertainties %>% 
  # exclude LV conditions
  filter(!grepl("LV", condition), N == "1000") %>% 
  tidyr::pivot_wider(names_from = statistics, values_from=value) %>% 
  ggplot(aes(x= factor(condition, levels = c("5p-sparse", "5p-dense", "10p-sparse", "10p-dense")), y=mean, group = algorithm, colour = algorithm)) +
  scale_colour_manual(values = c("#FF0000", "#00A08A", "#F2AD00"), name= "") +
  geom_line(aes(group = algorithm)) +
  geom_point() +
  # in this case, N = 1000 (hence, sqrt(1000))
  geom_errorbar(aes(ymin=mean-qnorm(0.975)*sd/sqrt(1000), ymax=mean+qnorm(0.975)*sd/sqrt(1000)), width=0.1) +
  labs(x="", y="", title = "", subtitle = "Without LV_UNCERTAINTY") +
  theme_classic() + MyTheme
  

## WITHOUT LV CONDITION: SHD
p4 <- SHDs %>% 
  # exclude LV conditions
  filter(!grepl("LV", condition), N == "1000") %>% 
  tidyr::pivot_wider(names_from = statistics, values_from=value) %>% 
  ggplot(aes(x= factor(condition, levels = c("5p-sparse", "5p-dense", "10p-sparse", "10p-dense")), y=mean, group = algorithm, colour = algorithm)) +
  scale_colour_manual(values = c("#FF0000", "#00A08A", "#F2AD00"), name= "") +
  geom_line(aes(group = algorithm)) +
  geom_point() +
  # in this case, N = 1000 (hence, sqrt(1000)) 
  geom_errorbar(aes(ymin=mean-qnorm(0.975)*sd/sqrt(1000), ymax=mean+qnorm(0.975)*sd/sqrt(1000)), width=0.1) +
  labs(x="", y="", title = "", subtitle = "Without LV_SHD") +
  theme_classic() + MyTheme

  


## ========================
## WITH LV CONDITION
## ========================

## WITH LV CONDITION: PRECISION
p5 <- results %>% 
  # exclude LV conditions
  filter(grepl("LV", condition), N == "1000") %>% 
  tidyr::pivot_wider(names_from = metric, values_from=value) %>% 
  ggplot(aes(x= factor(condition, levels = c("5p_LV", "10p_LV")), y=average_precision_mean, group = algorithm, colour = algorithm)) +
  scale_colour_manual(values = c("#FF0000", "#00A08A", "#F2AD00"), name= "") +
  geom_line(aes(group = algorithm)) +
  geom_point() +
  # in this case, N = 1000 (hence, sqrt(1000))
  geom_errorbar(aes(ymin=average_precision_mean-qnorm(0.975)*average_precision_sd/sqrt(as.numeric(N)), ymax=average_precision_mean+qnorm(0.975)*average_precision_sd/sqrt(as.numeric(N))), width=0.1) +
  labs(x="", y="", title = "With a Latent Variable Condition", subtitle = "With LV_PRECISION") +
  theme_classic() + MyTheme


## WITH LV CONDITION: RECALL
p6 <- results %>% 
  # exclude LV conditions
  filter(grepl("LV", condition), N == "1000") %>% 
  tidyr::pivot_wider(names_from = metric, values_from=value) %>% 
  ggplot(aes(x= factor(condition, levels = c("5p_LV", "10p_LV")), y=average_recall_mean, group = algorithm, colour = algorithm)) +
  scale_colour_manual(values = c("#FF0000", "#00A08A", "#F2AD00"), name= "") +
  geom_line(aes(group = algorithm)) +
  geom_point() +
  # in this case, N = 1000 (hence, sqrt(1000))
  geom_errorbar(aes(ymin=average_recall_mean-qnorm(0.975)*average_recall_sd/sqrt(as.numeric(N)), ymax=average_recall_mean+qnorm(0.975)*average_recall_sd/sqrt(as.numeric(N))), width=0.1) +
  labs(x="", y="", title = "", subtitle = "With LV_RECALL") +
  theme_classic() + MyTheme
  

## WITH LV CONDITION: UNCERTAINTY
p7 <- uncertainties %>% 
  # exclude LV conditions
  filter(grepl("LV", condition), N == "1000") %>% 
  tidyr::pivot_wider(names_from = statistics, values_from=value) %>% 
  ggplot(aes(x= factor(condition, levels = c("5p-LV", "10p-LV")), y=mean, group = algorithm, colour = algorithm)) +
  scale_colour_manual(values = c("#FF0000", "#00A08A", "#F2AD00"), name= "") +
  geom_line(aes(group = algorithm)) +
  geom_point() +
  # in this case, N = 1000 (hence, sqrt(1000))
  geom_errorbar(aes(ymin=mean-qnorm(0.975)*sd/sqrt(as.numeric(N)), ymax=mean+qnorm(0.975)*sd/sqrt(as.numeric(N))), width=0.1) +
  labs(x="", y="", title = "", subtitle = "With LV_UNCERTAINTY") +
  theme_classic() + MyTheme
  

## WITH LV CONDITION: SHD
p8 <- SHDs %>% 
  # exclude LV conditions
  filter(grepl("LV", condition), N == "1000") %>% 
  tidyr::pivot_wider(names_from = statistics, values_from=value) %>% 
  ggplot(aes(x= factor(condition, levels = c("5p-LV", "10p-LV")), y=mean, group = algorithm, colour = algorithm)) +
  scale_colour_manual(values = c("#FF0000", "#00A08A", "#F2AD00"), name= "") +
  geom_line(aes(group = algorithm)) +
  geom_point() +
  # in this case, N = 1000 (hence, sqrt(1000))
  geom_errorbar(aes(ymin=mean-qnorm(0.975)*sd/sqrt(as.numeric(N)), ymax=mean+qnorm(0.975)*sd/sqrt(as.numeric(N))), width=0.1) +
  labs(x="", y="", title = "", subtitle = "With LV_SHD") +
  theme_classic() + MyTheme
  

# combine plots
ggarrange(p1, p5, p2, p6, p3, p7, p4, p8,
                    ncol = 2, nrow = 4, common.legend = TRUE, legend = "bottom")

Figure 5. Simuation results

See code here.
## plot the results
times %>%
  ggplot(aes(x=factor(condition, levels= c("5psparse", "5pdense", "10psparse", "10pdense", "5pLV", "10pLV")), y = log(time), col= factor(algorithm))) +
  geom_boxplot(position = "dodge",   outlier.size = 0.8, outlier.alpha = 0.2) + theme_classic() +
  # scale_x_discrete(name ="Condition",
  #                  labels=c("", "5p-sparse", "", "","5p-dense","","", "10p-sparse","","","10p-dense","","","5p-LV","","","10p-LV","")) +
  scale_colour_manual(values = c("#FF0000", "#00A08A", "#F2AD00"), name= "") +
  labs(y = " log(ms)", x = "", title = "Algorithm Running Time", subtitle = "Time in milliseconds (ms)") +
  theme(axis.text.x = element_text(face = "bold", margin = margin(t = 13), size=10),
        legend.position="bottom",
        legend.text = element_text(face = "bold"))

Figure 6. Algorithm running time


3 Empirical Example

See code here.
# empirical data 408 rows by 26 columns (p = 26)
mcnally <- read.csv("../data/McNally.csv") 
# check the data
#glimpse(mcnally)
skimr::skim(mcnally)
Data summary
Name mcnally
Number of rows 408
Number of columns 26
_______________________
Column type frequency:
numeric 26
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
onset 0 1 1.20 1.07 0 0 1 2.00 3 ▇▆▁▆▃
middle 0 1 1.44 1.07 0 0 2 2.00 3 ▆▃▁▇▃
late 0 1 0.81 1.07 0 0 0 2.00 3 ▇▂▁▂▂
hypersom 0 1 1.01 0.99 0 0 1 2.00 3 ▇▇▁▅▂
sad 0 1 1.55 0.94 0 1 2 2.00 3 ▃▇▁▇▅
decappetite 0 1 0.49 0.73 0 0 0 1.00 3 ▇▃▁▁▁
incappetite 0 1 0.44 0.87 0 0 0 0.00 3 ▇▁▁▁▁
weightloss 0 1 0.50 0.94 0 0 0 1.00 3 ▇▁▁▁▁
weightgain 0 1 0.67 1.04 0 0 0 1.00 3 ▇▂▁▂▁
concen 0 1 1.48 0.87 0 1 1 2.00 3 ▃▇▁▇▂
guilt 0 1 1.56 1.17 0 1 1 3.00 3 ▆▇▁▃▇
suicide 0 1 0.63 0.82 0 0 0 1.00 3 ▇▅▁▂▁
anhedonia 0 1 1.27 1.05 0 0 1 2.00 3 ▇▇▁▅▅
fatigue 0 1 1.33 0.95 0 1 1 2.00 3 ▅▇▁▆▃
retard 0 1 0.66 0.81 0 0 0 1.00 3 ▇▅▁▂▁
agitation 0 1 1.10 0.93 0 0 1 2.00 3 ▅▇▁▃▂
obtime 0 1 2.95 0.89 0 2 3 4.00 4 ▁▁▆▇▇
obinterfer 0 1 2.69 0.83 0 2 3 3.00 4 ▁▁▆▇▃
obdistress 0 1 2.81 0.76 0 2 3 3.00 4 ▁▁▅▇▃
obresist 0 1 1.98 0.93 0 1 2 2.25 4 ▁▃▇▃▁
obcontrol 0 1 2.67 0.76 0 2 3 3.00 4 ▁▁▆▇▂
comptime 0 1 2.60 0.93 0 2 3 3.00 4 ▁▂▇▇▅
compinterf 0 1 2.58 0.88 0 2 3 3.00 4 ▁▂▆▇▂
compdis 0 1 2.71 0.84 0 2 3 3.00 4 ▁▁▅▇▂
compresis 0 1 2.16 0.89 0 2 2 3.00 4 ▁▂▇▅▁
compcont 0 1 2.60 0.76 0 2 3 3.00 4 ▁▁▆▇▂
See code here.
# separate dep / ocd symptoms
depression <- mcnally[,1:16]
ocd <- mcnally[,17:26]
See code here.
## run ccd algorithm on entire mcnally (discrete)
ccd_mcnally <- ccdKP(df=mcnally, dataType = "discrete") 
matccd_mcnally <- CreateAdjMat(ccd_mcnally, p = 26)

## run fci algorithm on entire mcnally (discrete)
fci_mcnally <- tetradrunner(algoId = 'fci', df = mcnally,
                          dataType = 'discrete')
matfci_mcnally <- CreateAdjMat(fci_mcnally, p = ncol(mcnally))

## run ccd on depression symptoms
ccd_mcnally_dep <- ccdKP(df=depression, dataType = "discrete") 
mat_mcnally_dep <- CreateAdjMat(ccd_mcnally_dep, p = ncol(depression))

## run fci on depression symptoms
fci_mcdep <- tetradrunner(algoId = 'fci', df = depression,
                          dataType = 'discrete')
matfci_mcdep <- CreateAdjMat(fci_mcdep, p = ncol(depression))

## run ccd on ocd symptoms
ccd_mcnally_ocd <- ccdKP(df=ocd, dataType = "discrete") 
mat_mcnally_ocd <- CreateAdjMat(ccd_mcnally_ocd, p = ncol(ocd))

## run fci on ocd symptoms
fci_mcocd <- tetradrunner(algoId = 'fci', df = ocd,
                          dataType = 'discrete')
matfci_mcocd <- CreateAdjMat(fci_mcocd, p = ncol(ocd))
See code here.
## CCD PAG for the entire Mcnally
plotPAG(ccd_mcnally, matccd_mcnally) 

Figure 7. CCD PAG on entire McNally data

See code here.
## FCI PAG for the entire Mcnally
plotPAG(fci_mcnally, matfci_mcnally) 

Figure 8. FCI PAG on entire McNally data

See code here.
## CCD PAG for the depression symptoms
plotPAG(ccd_mcnally_dep, mat_mcnally_dep) 

## FCI PAG for the depression symptoms
plotPAG(fci_mcdep, matfci_mcdep)

## CCD PAG for the ocd symptoms
plotPAG(ccd_mcnally_ocd, mat_mcnally_ocd) 

## FCI PAG for the ocd symptoms
plotPAG(fci_mcocd, matfci_mcocd)

(a) CCD PAG_depression symptoms

(b) FCI PAG_depression symptoms

(c) CCD PAG_OCD symptoms

(d) FCI PAG_OCD symptoms

Figure 9. Separate PAGs on depression and OCD symptoms

4 Session info

sessionInfo()
R version 4.2.2 (2022-10-31)
Platform: aarch64-apple-darwin20 (64-bit)
Running under: macOS Ventura 13.0

Matrix products: default
BLAS:   /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRlapack.dylib

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] grid      stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] microbenchmark_1.4.9 CCI.KP_0.1.0         MASS_7.3-58.1       
 [4] furrr_0.3.1          future_1.27.0        magrittr_2.0.3      
 [7] Rgraphviz_2.40.0     graph_1.74.0         BiocGenerics_0.42.0 
[10] DOT_0.1              rcausal_1.2.1        devtools_2.4.5      
[13] usethis_2.1.6        rJava_1.0-6          dplyr_1.0.9         
[16] purrr_0.3.4          ggpubr_0.4.0         ggplot2_3.3.6       
[19] pcalg_2.7-8          qgraph_1.9.2        

loaded via a namespace (and not attached):
  [1] backports_1.4.1     Hmisc_4.7-0         plyr_1.8.7         
  [4] igraph_1.3.5        repr_1.1.4          splines_4.2.2      
  [7] listenv_0.8.0       fastICA_1.2-3       digest_0.6.29      
 [10] htmltools_0.5.2     fansi_1.0.3         checkmate_2.1.0    
 [13] memoise_2.0.1       cluster_2.1.4       sfsmisc_1.1-14     
 [16] remotes_2.4.2       globals_0.15.1      bdsmatrix_1.3-6    
 [19] prettyunits_1.1.1   jpeg_0.1-9          colorspace_2.0-3   
 [22] skimr_2.1.4         xfun_0.31           callr_3.7.1        
 [25] crayon_1.5.1        jsonlite_1.8.0      survival_3.4-0     
 [28] glue_1.6.2          gtable_0.3.0        sjmisc_2.8.9       
 [31] V8_4.2.1            car_3.1-0           pkgbuild_1.3.1     
 [34] DEoptimR_1.0-11     ggm_2.5             abind_1.4-5        
 [37] scales_1.2.0        DBI_1.1.3           rstatix_0.7.0      
 [40] miniUI_0.1.1.1      Rcpp_1.0.9          xtable_1.8-4       
 [43] htmlTable_2.4.1     clue_0.3-63         foreign_0.8-83     
 [46] Formula_1.2-4       stats4_4.2.2        profvis_0.3.7      
 [49] htmlwidgets_1.5.4   RColorBrewer_1.1-3  lavaan_0.6-12      
 [52] ellipsis_0.3.2      farver_2.1.1        urlchecker_1.0.1   
 [55] pkgconfig_2.0.3     nnet_7.3-18         deldir_1.0-6       
 [58] utf8_1.2.2          labeling_0.4.2      tidyselect_1.1.2   
 [61] rlang_1.0.6         reshape2_1.4.4      later_1.3.0        
 [64] munsell_0.5.0       tools_4.2.2         cachem_1.0.6       
 [67] cli_3.4.1           generics_0.1.3      sjlabelled_1.2.0   
 [70] broom_1.0.0         fdrtool_1.2.17      evaluate_0.15      
 [73] stringr_1.4.1       fastmap_1.1.0       yaml_2.3.5         
 [76] processx_3.7.0      knitr_1.40          fs_1.5.2           
 [79] robustbase_0.95-0   glasso_1.11         pbapply_1.5-0      
 [82] RBGL_1.72.0         nlme_3.1-160        mime_0.12          
 [85] compiler_4.2.2      rstudioapi_0.13     curl_4.3.2         
 [88] png_0.1-7           ggsignif_0.6.3      tibble_3.1.7       
 [91] pbivnorm_0.6.0      stringi_1.7.8       highr_0.9          
 [94] ps_1.7.1            lattice_0.20-45     Matrix_1.5-1       
 [97] psych_2.2.5         vctrs_0.4.1         pillar_1.7.0       
[100] lifecycle_1.0.1     cowplot_1.1.1       insight_0.19.0     
[103] data.table_1.14.2   corpcor_1.6.10      httpuv_1.6.5       
[106] R6_2.5.1            latticeExtra_0.6-30 promises_1.2.0.1   
[109] gridExtra_2.3       parallelly_1.32.1   sessioninfo_1.2.2  
[112] codetools_0.2-18    gtools_3.9.3        assertthat_0.2.1   
[115] pkgload_1.3.0       withr_2.5.0         mnormt_2.1.0       
[118] parallel_4.2.2      rpart_4.1.19        tidyr_1.2.0        
[121] rmarkdown_2.16      carData_3.0-5       shiny_1.7.2        
[124] base64enc_0.1-3     interp_1.1-2